package maritacaclient

import (
	"fmt"
)

type StatusError struct {
	Status       string `json:"status,omitempty"`
	ErrorMessage string `json:"error"`
	StatusCode   int    `json:"code,omitempty"`
}

func (e StatusError) Error() string {
	switch {
	case e.Status != "" && e.ErrorMessage != "":
		return fmt.Sprintf("%s: %s", e.Status, e.ErrorMessage)
	case e.Status != "":
		return e.Status
	case e.ErrorMessage != "":
		return e.ErrorMessage
	default:
		// this should not happen
		return "something went wrong, please see the ollama server logs for details"
	}
}

type Message struct {
	Role    string `json:"role"` // one of ["system", "user", "assistant"]
	Content string `json:"content"`
}

type ChatRequest struct {
	Model    string     `json:"model"`
	Messages []*Message `json:"messages"`
	Stream   *bool      `json:"stream,omitempty"`
	Format   string     `json:"format"`
	Options
}

type ChatResponse struct {
	Answer string `json:"answer"`
	Model  string `json:"model"`
	Text   string `json:"text"`
	Event  string `json:"event,omitempty"`

	Metrics
}

type Metrics struct {
	Usage struct {
		CompletionTokens int `json:"completion_tokens"`
		PromptTokens     int `json:"prompt_tokens"`
		TotalTokens      int `json:"total_tokens"`
	} `json:"usage"`
}

type Options struct {
	// Token
	Token string `json:"-"`

	// default: true
	// If True, the model will run in chat mode, where messages is a string containing the
	// user's message or a list of messages containing the iterations of the conversation
	// between user and assistant. If False, messages must be a string containing the desired prompt.
	ChatMode bool `json:"chat_mode,omitempty"`

	// minimum: 1
	// Maximum number of tokens that will be generated by the mode
	MaxTokens int `json:"max_tokens,omitempty"`

	// Name of the model that will be used for inference. Currently, only the "sabia-2-medium" and "sabia-2-small" model is available.
	Model string `json:"model"`
	// Default: true
	// If True, the model's generation will be sampled via top-k sampling.
	// Otherwise, the generation will always select the token with the highest probability.
	// Using do_sample=False leads to a deterministic result, but with less diversity.
	DoSample bool `json:"do_sample,omitempty"`

	// minimum: 0
	// default: 0.7
	// Sampling temperature (greater than or equal to zero).
	// Higher values lead to greater diversity in generation but also increase the likelihood of generating nonsensical texts.
	// Values closer to zero result in more plausible texts but increase the chances of generating repetitive texts.
	Temperature float64 `json:"temperature,omitempty"`

	// exclusiveMaximum: 1
	// exclusiveMinimum: 0
	// default: 0.95
	// If less than 1, it retains only the top tokens with cumulative probability >= top_p (nucleus filtering).
	// For example, 0.95 means that only the tokens that make up the top 95% of the probability mass are considered when predicting the next token.
	//  Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751).
	TopP float64 `json:"top_p,omitempty"`

	// 	minimum: 0
	// default: 1
	// Repetition penalty. Positive values encourage the model not to repeat previously generated tokens.
	RepetitionPenalty float64 `json:"repetition_penalty,omitempty"`

	// List of tokens that, when generated, indicate that the model should stop generating tokens.
	StoppingTokens []string `json:"stopping_tokens,omitempty"`

	// default: false
	// If True, the model will run in streaming mode,
	// where tokens will be generated and returned to the client as they are produced.
	// If False, the model will run in batch mode, where all tokens will be generated before being returned to the client.
	Stream bool `json:"stream,omitempty"`

	// 	minimum: 1
	// default: 4
	// Number of tokens that will be returned per message. This field is ignored if stream=False.
	NumTokensPerMessage int `json:"num_tokens_per_message,omitempty"`
}
